# Purpose: Create Histograms (for each month) of portion of day missing to examine intraday imputation
#          needs.

all_data <- read_dta(paste0(EMISSIONS_DATA_IN, "/CEMS_2018-12-31 to 2021-04-03_StackDay_Balanced_HistCFs_Master337s_3sample2trunc.dta"))

BINWIDTH = .05
subset_data <- all_data %>%
  mutate(Month= format(date, '%Y-%m'),
         Quarter = str_c(format(date, '%Y '), quarters(date)),
         tot_report_mins = if_else(is.na(tot_report_mins), 0, tot_report_mins),
         portion_of_day_reported = tot_report_mins/(60*24)) %>%
  filter(!D_interregnum)


monthly_data <- subset_data %>%
  group_by(composite_id, Quarter, Month) %>%
  summarize(portion_of_month_reported = mean(portion_of_day_reported, na.rm = T)) %>%
  ungroup()

# Portion of stack-months which we have to do intra-day imputation on: = 72.7%
monthly_data %>% summarize(test=mean(!(portion_of_month_reported %in% c(0,1))))

# Portion of months which we have full data on 3%%
monthly_data %>% summarize(test=mean(portion_of_month_reported==1))
# Portion of months which we have no data on 25%
monthly_data %>% summarize(test=mean(portion_of_month_reported==0))

# Conditional on Intra-month imputation what is the average amount = 18.7%
monthly_data %>%
  filter(portion_of_month_reported > 0 & portion_of_month_reported < 1) %>%
  summarize(test=mean(1-portion_of_month_reported))

# Conditional on having some data how much imputation is needed = 18%
monthly_data %>%
  filter(portion_of_month_reported > 0) %>%
  summarize(test=mean(1-portion_of_month_reported))
  
monthly_data %>%
  # imputation technique
  # Only look at those plants for whom we do intra-day imputation. If 0 for whole day then use inter-day
  filter(portion_of_month_reported > 0 & portion_of_month_reported < 1) %>%
  ggplot(aes(x=portion_of_month_reported, y=after_stat(density)*BINWIDTH))+
  geom_histogram(boundary=1, binwidth = BINWIDTH, fill="dodgerblue", color="dodgerblue3", size=.3)+
  scale_x_continuous(n.breaks = 6)+
  scale_y_continuous(n.breaks=5, limits = c(0,1))+
  facet_wrap(~Quarter)+ 
  theme_classic()+
  ylab("Share of Quarter's Stack-Months in Reporting Bin")+#\n(excluding stack-days with no reporting or perfect reporting)")+
  xlab("Share of the Month the Stack is Reporting")+#\n(calculated at the minute level)")+
  #labs(caption="Excluding all days during interregnums\nExcluding stack-days in which report entire day or report none of day.")+
  annotate("segment", x=-Inf, xend=Inf, y=-Inf, yend=-Inf, linewidth=1)+
  theme(
    axis.line.x = element_blank(),
    panel.grid.major.y = element_line(colour = 'grey93', 
                                      size=0.3, 
                                      linetype='solid'),
    panel.grid.minor = element_blank(),
    strip.background = element_blank()
  )

ggsave(paste0(EMISSIONS_FIGS, "/Figure_C1.pdf"), width = 8, height = 6, units = "in")



